From 97a442456383617b194e76daca5c1d9fb45c6962 Mon Sep 17 00:00:00 2001 From: "akw27@labyrinth.cl.cam.ac.uk" Date: Mon, 22 Nov 2004 17:50:27 +0000 Subject: [PATCH] bitkeeper revision 1.1159.184.1 (41a226e3Y9RHKGkAbgRWDb4t7yHQPQ) Initial push of the block tap code. This is a driver to let you intercept block requests and/or implement block devices in user space, all in an isolated VM. --- .rootkeys | 6 + linux-2.6.9-xen-sparse/arch/xen/Kconfig | 24 + .../arch/xen/configs/xen0_defconfig | 2 + .../arch/xen/configs/xenU_defconfig | 2 + linux-2.6.9-xen-sparse/drivers/xen/Makefile | 1 + .../drivers/xen/blkback/blkback.c | 42 +- .../drivers/xen/blkfront/blkfront.c | 3 +- .../drivers/xen/blktap/Makefile | 3 + .../drivers/xen/blktap/blktap.c | 86 +++ .../drivers/xen/blktap/blktap.h | 254 +++++++++ .../drivers/xen/blktap/blktap_controlmsg.c | 358 ++++++++++++ .../drivers/xen/blktap/blktap_datapath.c | 517 ++++++++++++++++++ .../drivers/xen/blktap/blktap_userdev.c | 243 ++++++++ 13 files changed, 1536 insertions(+), 5 deletions(-) create mode 100644 linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile create mode 100644 linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c create mode 100644 linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h create mode 100644 linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c create mode 100644 linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c create mode 100644 linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c diff --git a/.rootkeys b/.rootkeys index aa50d08537..09e1b67be9 100644 --- a/.rootkeys +++ b/.rootkeys @@ -193,6 +193,12 @@ 40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c 40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.9-xen-sparse/drivers/xen/blkfront/block.h 40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.9-xen-sparse/drivers/xen/blkfront/vbd.c +41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile +41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c +41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h +41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c +41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c +41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c 40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.9-xen-sparse/drivers/xen/console/Makefile 3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.9-xen-sparse/drivers/xen/console/console.c 40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.9-xen-sparse/drivers/xen/evtchn/Makefile diff --git a/linux-2.6.9-xen-sparse/arch/xen/Kconfig b/linux-2.6.9-xen-sparse/arch/xen/Kconfig index a9675229ae..d520aefe17 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/Kconfig +++ b/linux-2.6.9-xen-sparse/arch/xen/Kconfig @@ -49,6 +49,20 @@ config XEN_BLKDEV_BACKEND block devices to other guests via a high-performance shared-memory interface. +if XEN_BLKDEV_BACKEND +config XEN_BLKDEV_TAP_BE + bool "Block Tap support for backend driver (DANGEROUS)" + default n + help + If you intend to use the block tap driver, the backend domain will + not know the domain id of the real frontend, and so will not be able + to map its data pages. This modifies the backend to attempt to map + from both the tap domain and the real frontend. This presents a + security risk, and so should ONLY be used for development + with the blktap. This option will be removed as the block drivers are + modified to use grant tables. +endif + config XEN_NETDEV_BACKEND bool "Network-device backend driver" default y if XEN_PHYSDEV_ACCESS @@ -94,6 +108,16 @@ config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER enabled; then you must say N here. endif +config XEN_BLKDEV_TAP + bool "Block device tap driver" + default n + help + This driver allows a VM to interact on block device channels + to other VMs. Block messages may be passed through or redirected + to a character device, allowing device prototyping in application + space. Odds are that you want to say N here. + + config XEN_WRITABLE_PAGETABLES bool default y diff --git a/linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig b/linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig index 1532ab3dfb..1455a24d5f 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig +++ b/linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig @@ -13,9 +13,11 @@ CONFIG_NO_IDLE_HZ=y CONFIG_XEN_PRIVILEGED_GUEST=y CONFIG_XEN_PHYSDEV_ACCESS=y CONFIG_XEN_BLKDEV_BACKEND=y +# CONFIG_XEN_BLKDEV_TAP_BE is not set CONFIG_XEN_NETDEV_BACKEND=y CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set CONFIG_XEN_WRITABLE_PAGETABLES=y CONFIG_XEN_SCRUB_PAGES=y diff --git a/linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig b/linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig index 24c57a3f50..50d61f7940 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig +++ b/linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig @@ -13,9 +13,11 @@ CONFIG_NO_IDLE_HZ=y # CONFIG_XEN_PRIVILEGED_GUEST is not set # CONFIG_XEN_PHYSDEV_ACCESS is not set # CONFIG_XEN_BLKDEV_BACKEND is not set +# CONFIG_XEN_BLKDEV_TAP_BE is not set # CONFIG_XEN_NETDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y +# CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set CONFIG_XEN_WRITABLE_PAGETABLES=y CONFIG_XEN_SCRUB_PAGES=y diff --git a/linux-2.6.9-xen-sparse/drivers/xen/Makefile b/linux-2.6.9-xen-sparse/drivers/xen/Makefile index e181171a61..8728d6a725 100644 --- a/linux-2.6.9-xen-sparse/drivers/xen/Makefile +++ b/linux-2.6.9-xen-sparse/drivers/xen/Makefile @@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ +obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c index 6d201022d2..ce5b010375 100644 --- a/linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c +++ b/linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c @@ -68,6 +68,19 @@ static PEND_RING_IDX pending_prod, pending_cons; static kmem_cache_t *buffer_head_cachep; #endif +#ifdef CONFIG_XEN_BLKDEV_TAP_BE +/* + * If the tap driver is used, we may get pages belonging to either the tap + * or (more likely) the real frontend. The backend must specify which domain + * a given page belongs to in update_va_mapping though. For the moment, + * we pass in the domid of the real frontend in PROBE messages and store + * this value in alt_dom. Then on mapping, we try both. This is a Guiness + * book of records-calibre grim hack, and represents a bit of a security risk. + * Grant tables will soon solve the problem though! + */ +static domid_t alt_dom = 0; +#endif + static int do_block_io_op(blkif_t *blkif, int max_to_do); static void dispatch_probe(blkif_t *blkif, blkif_request_t *req); static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); @@ -323,12 +336,27 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) (blkif_last_sect(req->frame_and_sects[0]) != 7) ) goto out; +#ifdef CONFIG_XEN_BLKDEV_TAP_BE + /* Grab the real frontend out of the probe message. */ + alt_dom = (domid_t)req->frame_and_sects[1]; +#endif + if ( HYPERVISOR_update_va_mapping_otherdomain( MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT, (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL }, - 0, blkif->domid) ) + 0, blkif->domid) ) { +#ifdef CONFIG_XEN_BLKDEV_TAP_BE + /* That didn't work. Try alt_dom. */ + if ( HYPERVISOR_update_va_mapping_otherdomain( + MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT, + (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL }, + 0, alt_dom) ) + goto out; +#else goto out; - +#endif + } + rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), PAGE_SIZE / sizeof(vdisk_t)); @@ -411,8 +439,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT; mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot; mcl[i].args[2] = 0; +#ifdef CONFIG_XEN_BLKDEV_TAP_BE + mcl[i].args[3] = (alt_dom != 0) ? alt_dom : blkif->domid; +#else mcl[i].args[3] = blkif->domid; - +#endif phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = FOREIGN_FRAME(phys_seg[i].buffer >> PAGE_SHIFT); } @@ -579,7 +610,10 @@ static int __init blkif_init(void) #endif blkif_ctrlif_init(); - + +#ifdef CONFIG_XEN_BLKDEV_TAP_BE + printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n"); +#endif return 0; } diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c index 3384099641..2f79208321 100644 --- a/linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c +++ b/linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c @@ -1262,7 +1262,8 @@ static void blkif_status(blkif_fe_interface_status_t *status) { if ( status->handle != blkif_handle ) { - WPRINTK(" Invalid blkif: handle=%u", status->handle); + WPRINTK(" Invalid blkif: handle=%u\n", status->handle); + unexpected(status); return; } diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile new file mode 100644 index 0000000000..80b7ca0627 --- /dev/null +++ b/linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile @@ -0,0 +1,3 @@ + +obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o + diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c new file mode 100644 index 0000000000..5e7d47c58f --- /dev/null +++ b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * blktap.c + * + * XenLinux virtual block-device tap. + * + * Copyright (c) 2004, Andrew Warfield + * + * Based on the original split block driver: + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + * Copyright (c) 2004, Christian Limpach + * + * Note that unlike the split block driver code, this driver has been developed + * strictly for Linux 2.6 + */ + +#include "blktap.h" + +int __init xlblk_init(void) +{ + ctrl_msg_t cmsg; + blkif_fe_driver_status_t fe_st; + blkif_be_driver_status_t be_st; + + printk(KERN_INFO "Initialising Xen block tap device\n"); + + DPRINTK(" tap - Backend connection init:\n"); + + + (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_BLKIF_FE; + cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; + cmsg.length = sizeof(blkif_fe_driver_status_t); + fe_st.status = BLKIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &fe_st, sizeof(fe_st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + + DPRINTK(" tap - Frontend connection init:\n"); + + active_reqs_init(); + + ptfe_blkif.status = DISCONNECTED; + + (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_BLKIF_BE; + cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS; + cmsg.length = sizeof(blkif_be_driver_status_t); + be_st.status = BLKIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &be_st, sizeof(be_st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + + DPRINTK(" tap - Userland channel init:\n"); + + blktap_init(); + + DPRINTK("Blkif tap device initialized.\n"); + + return 0; +} + +void blkdev_suspend(void) +{ +} + +void blkdev_resume(void) +{ + ctrl_msg_t cmsg; + blkif_fe_driver_status_t st; + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_BLKIF_FE; + cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; + cmsg.length = sizeof(blkif_fe_driver_status_t); + st.status = BLKIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &st, sizeof(st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); +} + + +__initcall(xlblk_init); diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h new file mode 100644 index 0000000000..7e5d73ddf7 --- /dev/null +++ b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h @@ -0,0 +1,254 @@ +/* + * blktap.h + * + * Interfaces for the Xen block tap driver. + * + * (c) 2004, Andrew Warfield, University of Cambridge + * + */ + +#ifndef __BLKTAP_H__ +#define __BLKTAP_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* -------[ debug / pretty printing ]--------------------------------- */ + +#if 0 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define ASSERT(_p) ((void)0) +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) + +/* -------[ connection / request tracking ]--------------------------- */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) +#endif + +extern spinlock_t blkif_io_lock; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned long shmem_frame; + unsigned int evtchn; + int irq; + /* Comms information. */ + blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ + BLKIF_RING_IDX blk_req_cons; /* Request consumer. */ + BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */ + + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + /* + * DISCONNECT response is deferred until pending requests are ack'ed. + * We therefore need to store the id from the original request. + */ u8 disconnect_rspid; + struct blkif_st *hash_next; + struct list_head blkdev_list; + spinlock_t blk_ring_lock; + atomic_t refcnt; + + struct work_struct work; +} blkif_t; + +typedef struct { + blkif_t *blkif; + unsigned long id; + int nr_pages; + unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int next_free; +} active_req_t; + + +/* -------[ block ring structs ]-------------------------------------- */ + +/* Types of ring. */ +#define BLKIF_REQ_RING_TYPE 1 +#define BLKIF_RSP_RING_TYPE 2 + +/* generic ring struct. */ +typedef struct blkif_generic_ring_struct { + int type; +} blkif_generic_ring_t; + +/* A requestor's view of a ring. */ +typedef struct blkif_req_ring_struct { + + int type; /* Will be BLKIF_REQ_RING_TYPE */ + BLKIF_RING_IDX req_prod; /* PRIVATE req_prod index */ + BLKIF_RING_IDX rsp_cons; /* Response consumer index */ + blkif_ring_t *ring; /* Pointer to shared ring struct */ + +} blkif_req_ring_t; + +#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 } + +/* A responder's view of a ring. */ +typedef struct blkif_rsp_ring_struct { + + int type; + BLKIF_RING_IDX rsp_prod; /* PRIVATE rsp_prod index */ + BLKIF_RING_IDX req_cons; /* Request consumer index */ + blkif_ring_t *ring; /* Pointer to shared ring struct */ + +} blkif_rsp_ring_t; + +#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 } + +#define RING(a) (blkif_generic_ring_t *)(a) + +inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring); + + +/* -------[ interposition -> character device interface ]------------- */ + +/* /dev/xen/blktap resides at device number major=10, minor=200 */ +#define BLKTAP_MINOR 202 + +/* size of the extra VMA area to map in attached pages. */ +#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE + +/* blktap IOCTLs: */ +#define BLKTAP_IOCTL_KICK_FE 1 +#define BLKTAP_IOCTL_KICK_BE 2 +#define BLKTAP_IOCTL_SETMODE 3 + +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 +#define BLKTAP_MODE_COPY_FE 0x00000004 +#define BLKTAP_MODE_COPY_BE 0x00000008 +#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 +#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 + +#define BLKTAP_MODE_INTERPOSE \ + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) + +#define BLKTAP_MODE_COPY_BOTH \ + (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) + +#define BLKTAP_MODE_COPY_BOTH_PAGES \ + (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) + +static inline int BLKTAP_MODE_VALID(unsigned long arg) +{ + return ( + ( arg == BLKTAP_MODE_PASSTHROUGH ) || + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || + ( arg == BLKTAP_MODE_INTERCEPT_BE ) || + ( arg == BLKTAP_MODE_INTERPOSE ) || + ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || + ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || + ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) + ); +} + + + +/* -------[ Mappings to User VMA ]------------------------------------ */ +#define MAX_PENDING_REQS 64 +#define BATCH_PER_DOMAIN 16 +extern struct vm_area_struct *blktap_vma; + +/* The following are from blkback.c and should probably be put in a + * header and included from there. + * The mmap area described here is where attached data pages eill be mapped. + */ + +extern unsigned long mmap_vstart; +#define MMAP_PAGES_PER_REQUEST \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) +#define MMAP_PAGES \ + (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) +#define MMAP_VADDR(_req,_seg) \ + (mmap_vstart + \ + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) + +/* immediately before the mmap area, we have a bunch of pages reserved + * for shared memory rings. + */ + +#define RING_PAGES 128 +extern unsigned long rings_vstart; + +/* -------[ Here be globals ]----------------------------------------- */ + +extern unsigned long blktap_mode; + + +/* blkif struct, containing ring to FE domain */ +extern blkif_t ptfe_blkif; + +/* Connection to a single backend domain. */ +extern blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */ +extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */ +extern BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */ + +/* Rings up to user space. */ +extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT; +extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT; + +/* Event channel to backend domain. */ +extern unsigned int blkif_ptbe_evtchn; + +/* User ring status... this will soon vanish into a ring struct. */ +extern unsigned long blktap_ring_ok; + +/* -------[ ...and function prototypes. ]----------------------------- */ + +/* init function for character device interface. */ +int blktap_init(void); + +/* interfaces to the char driver, passing messages to and from apps. */ +void blktap_kick_user(void); +int blktap_write_to_ring(blkif_request_t *req); + + +/* user ring access functions: */ +int blktap_write_fe_ring(blkif_request_t *req); +int blktap_write_be_ring(blkif_response_t *rsp); +int blktap_read_fe_ring(void); +int blktap_read_be_ring(void); + +/* and the helpers they call: */ +inline int write_resp_to_fe_ring(blkif_response_t *rsp); +inline void kick_fe_domain(void); + +inline int write_req_to_be_ring(blkif_request_t *req); +inline void kick_be_domain(void); + +/* Interrupt handlers. */ +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, + struct pt_regs *ptregs); +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs); + +/* Control message receiver. */ +extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id); + +#define __BLKINT_H__ +#endif diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c new file mode 100644 index 0000000000..a3d485a6f3 --- /dev/null +++ b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c @@ -0,0 +1,358 @@ +/****************************************************************************** + * blktap_controlmsg.c + * + * XenLinux virtual block-device tap. + * Control interfaces to the frontend and backend drivers. + * + * Copyright (c) 2004, Andrew Warfield + * + */ + +#include "blktap.h" + +#define BLKIF_STATE_CLOSED 0 +#define BLKIF_STATE_DISCONNECTED 1 +#define BLKIF_STATE_CONNECTED 2 + +static char *blkif_state_name[] = { + [BLKIF_STATE_CLOSED] = "closed", + [BLKIF_STATE_DISCONNECTED] = "disconnected", + [BLKIF_STATE_CONNECTED] = "connected", +}; + +static char * blkif_status_name[] = { + [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", + [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", + [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", + [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", +}; +static unsigned int blkif_pt_state = BLKIF_STATE_CLOSED; +static unsigned blkif_ptbe_irq; +unsigned int blkif_ptbe_evtchn; + +/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/ + + +void blkif_ptfe_create(blkif_be_create_t *create) +{ + blkif_t *blkif; + domid_t domid = create->domid; + unsigned int handle = create->blkif_handle; + + + /* May want to store info on the connecting domain here. */ + + DPRINTK("PT got BE_CREATE\n"); + blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */ + + /* blkif struct init code from blkback.c */ + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + blkif->handle = handle; + blkif->status = DISCONNECTED; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 0); + + create->status = BLKIF_BE_STATUS_OKAY; +} + + +void blkif_ptfe_destroy(blkif_be_destroy_t *destroy) +{ + /* Clear anything that we initialized above. */ + + DPRINTK("PT got BE_DESTROY\n"); + destroy->status = BLKIF_BE_STATUS_OKAY; +} + +void blkif_ptfe_connect(blkif_be_connect_t *connect) +{ + domid_t domid = connect->domid; + /*unsigned int handle = connect->blkif_handle;*/ + unsigned int evtchn = connect->evtchn; + unsigned long shmem_frame = connect->shmem_frame; + struct vm_struct *vma; + pgprot_t prot; + int error; + blkif_t *blkif; + + DPRINTK("PT got BE_CONNECT\n"); + + blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */ + + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + { + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); + error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), + shmem_frame<status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + else if ( error == -EFAULT ) { + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; + WPRINTK("BE_CONNECT: MAPPING error!\n"); + } + else + connect->status = BLKIF_BE_STATUS_ERROR; + vfree(vma->addr); + return; + } + + if ( blkif->status != DISCONNECTED ) + { + connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; + vfree(vma->addr); + return; + } + + blkif->evtchn = evtchn; + blkif->irq = bind_evtchn_to_irq(evtchn); + blkif->shmem_frame = shmem_frame; + blkif->blk_ring_base = (blkif_ring_t *)vma->addr; + blkif->status = CONNECTED; + /*blkif_get(blkif);*/ + + request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif); + + connect->status = BLKIF_BE_STATUS_OKAY; +} + +void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect) +{ + /* + * don't actually set the passthrough to disconnected. + * We just act as a pipe, and defer to the real ends to handle things like + * recovery. + */ + + DPRINTK("PT got BE_DISCONNECT\n"); + + disconnect->status = BLKIF_BE_STATUS_OKAY; + return; +} + +/*-----[ Control Messages to/from Backend VM ]----------------------------*/ + +/* Tell the controller to bring up the interface. */ +static void blkif_ptbe_send_interface_connect(void) +{ + ctrl_msg_t cmsg = { + .type = CMSG_BLKIF_FE, + .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, + .length = sizeof(blkif_fe_interface_connect_t), + }; + blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; + msg->handle = 0; + msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT; + + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); +} + +static void blkif_ptbe_close(void) +{ +} + +/* Move from CLOSED to DISCONNECTED state. */ +static void blkif_ptbe_disconnect(void) +{ + blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); + blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod + = ptbe_resp_cons = ptbe_req_prod = 0; + blkif_pt_state = BLKIF_STATE_DISCONNECTED; + DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n"); + blkif_ptbe_send_interface_connect(); +} + +static void blkif_ptbe_connect(blkif_fe_interface_status_t *status) +{ + int err = 0; + + blkif_ptbe_evtchn = status->evtchn; + blkif_ptbe_irq = bind_evtchn_to_irq(blkif_ptbe_evtchn); + + err = request_irq(blkif_ptbe_irq, blkif_ptbe_int, + SA_SAMPLE_RANDOM, "blkif", NULL); + if ( err ) { + WPRINTK("blkfront request_irq failed (%d)\n", err); + return; + } else { + /* transtion to connected in case we need to do a + a partion probe on a whole disk */ + blkif_pt_state = BLKIF_STATE_CONNECTED; + } +} + +static void unexpected(blkif_fe_interface_status_t *status) +{ + WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", + blkif_status_name[status->status], + blkif_state_name[blkif_pt_state]); +} + +static void blkif_ptbe_status( + blkif_fe_interface_status_t *status) +{ + if ( status->handle != 0 ) + { + DPRINTK("Status change on unsupported blkif %d\n", + status->handle); + return; + } + + DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]); + + switch ( status->status ) + { + case BLKIF_INTERFACE_STATUS_CLOSED: + switch ( blkif_pt_state ) + { + case BLKIF_STATE_CLOSED: + unexpected(status); + break; + case BLKIF_STATE_DISCONNECTED: + case BLKIF_STATE_CONNECTED: + unexpected(status); + blkif_ptbe_close(); + break; + } + break; + + case BLKIF_INTERFACE_STATUS_DISCONNECTED: + switch ( blkif_pt_state ) + { + case BLKIF_STATE_CLOSED: + blkif_ptbe_disconnect(); + break; + case BLKIF_STATE_DISCONNECTED: + case BLKIF_STATE_CONNECTED: + printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n"); + unexpected(status); + break; + } + break; + + case BLKIF_INTERFACE_STATUS_CONNECTED: + switch ( blkif_pt_state ) + { + case BLKIF_STATE_CLOSED: + unexpected(status); + blkif_ptbe_disconnect(); + blkif_ptbe_connect(status); + break; + case BLKIF_STATE_DISCONNECTED: + blkif_ptbe_connect(status); + break; + case BLKIF_STATE_CONNECTED: + unexpected(status); + blkif_ptbe_connect(status); + break; + } + break; + + case BLKIF_INTERFACE_STATUS_CHANGED: + switch ( blkif_pt_state ) + { + case BLKIF_STATE_CLOSED: + case BLKIF_STATE_DISCONNECTED: + unexpected(status); + break; + case BLKIF_STATE_CONNECTED: + /* vbd_update(); */ + /* tap doesn't really get state changes... */ + unexpected(status); + break; + } + break; + + default: + DPRINTK("Status change to unknown value %d\n", status->status); + break; + } +} + +/*-----[ All control messages enter here: ]-------------------------------*/ + +void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + switch ( msg->type ) + { + case CMSG_BLKIF_FE: + + switch ( msg->subtype ) + { + case CMSG_BLKIF_FE_INTERFACE_STATUS: + if ( msg->length != sizeof(blkif_fe_interface_status_t) ) + goto parse_error; + blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]); + break; + + default: + goto parse_error; + } + + case CMSG_BLKIF_BE: + + switch ( msg->subtype ) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_CONNECT: + if ( msg->length != sizeof(blkif_be_connect_t) ) + goto parse_error; + blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_DISCONNECT: + if ( msg->length != sizeof(blkif_be_disconnect_t) ) + goto parse_error; + blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]); + break; + + /* We just ignore anything to do with vbds for now. */ + + case CMSG_BLKIF_BE_VBD_CREATE: + DPRINTK("PT got VBD_CREATE\n"); + ((blkif_be_vbd_create_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + case CMSG_BLKIF_BE_VBD_DESTROY: + DPRINTK("PT got VBD_DESTROY\n"); + ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + case CMSG_BLKIF_BE_VBD_GROW: + DPRINTK("PT got VBD_GROW\n"); + ((blkif_be_vbd_grow_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + case CMSG_BLKIF_BE_VBD_SHRINK: + DPRINTK("PT got VBD_SHRINK\n"); + ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + default: + goto parse_error; + } + } + + ctrl_if_send_response(msg); + return; + + parse_error: + msg->length = 0; + ctrl_if_send_response(msg); +} diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c new file mode 100644 index 0000000000..c8733dc088 --- /dev/null +++ b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c @@ -0,0 +1,517 @@ +/****************************************************************************** + * blktap_datapath.c + * + * XenLinux virtual block-device tap. + * Block request routing data path. + * + * Copyright (c) 2004, Andrew Warfield + * + */ + +#include "blktap.h" + +/*-----[ The data paths ]-------------------------------------------------*/ + +/* Connections to the frontend domains.*/ +blkif_t ptfe_blkif; + +/* Connection to a single backend domain. */ +blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */ +BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */ +BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */ + +/* Rings up to user space. */ +blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT; +blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT; + +/*-----[ Ring helpers ]---------------------------------------------------*/ + +inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring) +{ + if (ring->type == BLKIF_REQ_RING_TYPE) { + blkif_req_ring_t *r = (blkif_req_ring_t *)ring; + return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE ); + } + + /* for now assume that there is always room in the response path. */ + return 0; +} + +/*-----[ Tracking active requests ]---------------------------------------*/ + +/* this must be the same as MAX_PENDING_REQS in blkback.c */ +#define MAX_ACTIVE_REQS 64 + +active_req_t active_reqs[MAX_ACTIVE_REQS]; +unsigned char active_req_ring[MAX_ACTIVE_REQS]; +spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; +typedef unsigned int ACTIVE_RING_IDX; +ACTIVE_RING_IDX active_prod, active_cons; +#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1)) +#define ACTIVE_IDX(_ar) (_ar - active_reqs) + +inline active_req_t *get_active_req(void) +{ + ASSERT(active_cons != active_prod); + return &active_reqs[MASK_ACTIVE_IDX(active_cons++)]; +} + +inline void free_active_req(active_req_t *ar) +{ + unsigned long flags; + + spin_lock_irqsave(&active_req_lock, flags); + active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); + spin_unlock_irqrestore(&active_req_lock, flags); +} + +inline void active_reqs_init(void) +{ + ACTIVE_RING_IDX i; + + active_cons = 0; + active_prod = MAX_ACTIVE_REQS; + memset(active_reqs, 0, sizeof(active_reqs)); + for ( i = 0; i < MAX_ACTIVE_REQS; i++ ) + active_req_ring[i] = i; +} + +/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/ + +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) +{ + /* we have pending messages from the real frontend. */ + + blkif_request_t *req_s, *req_d; + BLKIF_RING_IDX fe_rp; + unsigned long flags; + int notify; + unsigned long i; + active_req_t *ar; + + DPRINTK("PT got FE interrupt.\n"); + + /* lock both rings */ + spin_lock_irqsave(&blkif_io_lock, flags); + + /* While there are REQUESTS on FERing: */ + fe_rp = ptfe_blkif.blk_ring_base->req_prod; + rmb(); + notify = (ptfe_blkif.blk_req_cons != fe_rp); + + for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) { + + /* Get the next request */ + req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req; + + /* This is a new request: + * Assign an active request record, and remap the id. + */ + ar = get_active_req(); + ar->id = req_s->id; + req_s->id = ACTIVE_IDX(ar); + DPRINTK("%3lu < %3lu\n", req_s->id, ar->id); + + /* FE -> BE interposition point is here. */ + + /* ------------------------------------------------------------- */ + /* BLKIF_OP_PROBE_HACK: */ + /* Until we have grant tables, we need to allow the backent to */ + /* map pages that are either from this domain, or more commonly */ + /* from the real front end. We achieve this in a terrible way, */ + /* by passing the front end's domid allong with PROBE messages */ + /* Once grant tables appear, this should all go away. */ + + if (req_s->operation == BLKIF_OP_PROBE) { + DPRINTK("Adding FE domid to PROBE request.\n"); + (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid; + } + + /* ------------------------------------------------------------- */ + + /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_COPY_FE) ) { + + /* Copy the response message to UFERing */ + /* In MODE_INTERCEPT_FE, map attached pages into the app vma */ + /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */ + + /* XXX: mapping/copying of attached pages is still not done! */ + + DPRINTK("req->UFERing\n"); + blktap_write_fe_ring(req_s); + + + } + + /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { + + /* be included to prevent noise from the fe when its off */ + /* copy the request message to the BERing */ + + DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", + (unsigned)MASK_BLKIF_IDX(i), + (unsigned)MASK_BLKIF_IDX(ptbe_req_prod)); + + req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req; + + memcpy(req_d, req_s, sizeof(blkif_request_t)); + + ptbe_req_prod++; + } + } + + ptfe_blkif.blk_req_cons = i; + + /* If we have forwarded any responses, notify the appropriate ends. */ + if (notify) { + + /* we have sent stuff to the be, notify it. */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { + wmb(); + blk_ptbe_ring->req_prod = ptbe_req_prod; + + notify_via_evtchn(blkif_ptbe_evtchn); + DPRINTK(" -- and notified.\n"); + } + + /* we sent stuff to the app, notify it. */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_COPY_FE) ) { + + blktap_kick_user(); + } + } + + /* unlock rings */ + spin_unlock_irqrestore(&blkif_io_lock, flags); + + return IRQ_HANDLED; +} + +inline int write_req_to_be_ring(blkif_request_t *req) +{ + blkif_request_t *req_d; + + req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req; + memcpy(req_d, req, sizeof(blkif_request_t)); + ptbe_req_prod++; + + return 0; +} + +inline void kick_be_domain(void) { + wmb(); + blk_ptbe_ring->req_prod = ptbe_req_prod; + notify_via_evtchn(blkif_ptbe_evtchn); +} + +/*-----[ Data to/from Backend (server) VM ]------------------------------*/ + + +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, + struct pt_regs *ptregs) +{ + blkif_response_t *resp_s, *resp_d; + BLKIF_RING_IDX be_rp; + unsigned long flags; + int notify; + unsigned long i; + active_req_t *ar; + + DPRINTK("PT got BE interrupt.\n"); + + /* lock both rings */ + spin_lock_irqsave(&blkif_io_lock, flags); + + /* While there are RESPONSES on BERing: */ + be_rp = blk_ptbe_ring->resp_prod; + rmb(); + notify = (ptbe_resp_cons != be_rp); + + for ( i = ptbe_resp_cons; i != be_rp; i++ ) + { + /* BE -> FE interposition point is here. */ + + /* Get the next response */ + resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp; + + + /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_COPY_BE) ) { + + /* Copy the response message to UBERing */ + /* In MODE_INTERCEPT_BE, map attached pages into the app vma */ + /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */ + + /* XXX: copy/map the attached page! */ + + DPRINTK("rsp->UBERing\n"); + blktap_write_be_ring(resp_s); + + } + + /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { + + /* (fe included to prevent random interference from the BE) */ + /* Copy the response message to FERing */ + + DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", + (unsigned) MASK_BLKIF_IDX(i), + (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)); + + /* remap id, and free the active req. blkif lookup goes here too.*/ + ar = &active_reqs[resp_s->id]; + DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id); + resp_s->id = ar->id; + free_active_req(ar); + + resp_d = &ptfe_blkif.blk_ring_base->ring[ + MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp; + + memcpy(resp_d, resp_s, sizeof(blkif_response_t)); + + ptfe_blkif.blk_resp_prod++; + + } + } + + ptbe_resp_cons = i; + + /* If we have forwarded any responses, notify the apropriate domains. */ + if (notify) { + + /* we have sent stuff to the fe. notify it. */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { + wmb(); + ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod; + + notify_via_evtchn(ptfe_blkif.evtchn); + DPRINTK(" -- and notified.\n"); + } + + /* we sent stuff to the app, notify it. */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_COPY_BE) ) { + + blktap_kick_user(); + } + } + + spin_unlock_irqrestore(&blkif_io_lock, flags); + return IRQ_HANDLED; +} + +inline int write_resp_to_fe_ring(blkif_response_t *rsp) +{ + blkif_response_t *resp_d; + active_req_t *ar; + + /* remap id, and free the active req. blkif lookup goes here too.*/ + ar = &active_reqs[rsp->id]; + DPRINTK("%3lu > %3lu\n", rsp->id, ar->id); + rsp->id = ar->id; + free_active_req(ar); + + resp_d = &ptfe_blkif.blk_ring_base->ring[ + MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp; + + memcpy(resp_d, rsp, sizeof(blkif_response_t)); + ptfe_blkif.blk_resp_prod++; + + return 0; +} + +inline void kick_fe_domain(void) { + wmb(); + ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod; + notify_via_evtchn(ptfe_blkif.evtchn); + +} + +static inline void flush_requests(void) +{ + wmb(); /* Ensure that the frontend can see the requests. */ + blk_ptbe_ring->req_prod = ptbe_req_prod; + notify_via_evtchn(blkif_ptbe_evtchn); +} + +/*-----[ Data to/from user space ]----------------------------------------*/ + + +int blktap_write_fe_ring(blkif_request_t *req) +{ + blkif_request_t *target; + int error, i; + + /* + * This is called to pass a request from the real frontend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: fe_ring not ready for a request!\n"); + return 0; + } + + if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) { + DPRINTK("blktap: fe_ring is full, can't add.\n"); + return 0; + } + + target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req; + memcpy(target, req, sizeof(*req)); + +/* maybe move this stuff out into a seperate func ------------------- */ + + /* + * For now, map attached page into a fixed position into the vma. + * XXX: make this map to a free page. + */ + + /* Attempt to map the foreign pages directly in to the application */ + for (i=0; inr_segments; i++) { + + /* get an unused virtual address from the char device */ + /* store the old page address */ + /* replace the address with the virtual address */ + + /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */ + + error = direct_remap_area_pages(blktap_vma->vm_mm, + MMAP_VADDR(req->id, i), + target->frame_and_sects[0] & PAGE_MASK, + PAGE_SIZE, + blktap_vma->vm_page_prot, + ptfe_blkif.domid); + if ( error != 0 ) { + printk(KERN_INFO "remapping attached page failed! (%d)\n", error); + return 0; + } + } + /* fix the address of the attached page in the message. */ + /* TODO: preserve the segment number stuff here... */ + /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/ +/* ------------------------------------------------------------------ */ + + + fe_ring.req_prod++; + + return 0; +} + +int blktap_write_be_ring(blkif_response_t *rsp) +{ + blkif_response_t *target; + + /* + * This is called to pass a request from the real backend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: be_ring not ready for a request!\n"); + return 0; + } + + if ( BLKTAP_RING_FULL(RING(&be_ring)) ) { + DPRINTK("blktap: be_ring is full, can't add.\n"); + return 0; + } + + target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp; + memcpy(target, rsp, sizeof(*rsp)); + + + /* XXX: map attached pages and fix-up addresses in the copied address. */ + + be_ring.rsp_prod++; + + return 0; +} + +int blktap_read_fe_ring(void) +{ + /* This is called to read responses from the UFE ring. */ + + BLKIF_RING_IDX fe_rp; + unsigned long i; + int notify; + + DPRINTK("blktap_read_fe_ring()\n"); + + fe_rp = fe_ring.ring->resp_prod; + rmb(); + notify = (fe_rp != fe_ring.rsp_cons); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { + + /* for each outstanding message on the UFEring */ + for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) { + + /* XXX: remap pages on that message as necessary */ + /* copy the message to the UBEring */ + + DPRINTK("resp->fe_ring\n"); + write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp); + } + + fe_ring.rsp_cons = fe_rp; + + /* notify the fe if necessary */ + if ( notify ) { + DPRINTK("kick_fe_domain()\n"); + kick_fe_domain(); + } + } + + return 0; +} + +int blktap_read_be_ring(void) +{ + /* This is called to read responses from the UBE ring. */ + + BLKIF_RING_IDX be_rp; + unsigned long i; + int notify; + + DPRINTK("blktap_read_be_ring()\n"); + + be_rp = be_ring.ring->req_prod; + rmb(); + notify = (be_rp != be_ring.req_cons); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { + + /* for each outstanding message on the UFEring */ + for ( i = be_ring.req_cons; i != be_rp; i++ ) { + + /* XXX: remap pages on that message as necessary */ + /* copy the message to the UBEring */ + + DPRINTK("req->be_ring\n"); + write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req); + } + + be_ring.req_cons = be_rp; + + /* notify the fe if necessary */ + if ( notify ) { + DPRINTK("kick_be_domain()\n"); + kick_be_domain(); + } + } + + return 0; +} diff --git a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c new file mode 100644 index 0000000000..c10e3f3a44 --- /dev/null +++ b/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c @@ -0,0 +1,243 @@ +/****************************************************************************** + * blktap_userdev.c + * + * XenLinux virtual block-device tap. + * Control interface between the driver and a character device. + * + * Copyright (c) 2004, Andrew Warfield + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blktap.h" + + +unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH; + +/* Only one process may open /dev/xen/blktap at any time. */ +static unsigned long blktap_dev_inuse; +unsigned long blktap_ring_ok; /* make this ring->state */ + +/* for poll: */ +static wait_queue_head_t blktap_wait; + +/* Where things are inside the device mapping. */ +struct vm_area_struct *blktap_vma; +unsigned long mmap_vstart; +unsigned long rings_vstart; + +/* -------[ blktap vm ops ]------------------------------------------- */ + +static struct page *blktap_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + /* + * if the page has not been mapped in by the driver then generate + * a SIGBUS to the domain. + */ + + force_sig(SIGBUS, current); + + return 0; +} + +struct vm_operations_struct blktap_vm_ops = { + nopage: blktap_nopage, +}; + +/* -------[ blktap file ops ]----------------------------------------- */ + +static int blktap_open(struct inode *inode, struct file *filp) +{ + if ( test_and_set_bit(0, &blktap_dev_inuse) ) + return -EBUSY; + + printk(KERN_ALERT "blktap open.\n"); + + /* Allocate the fe ring. */ + fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL); + if (fe_ring.ring == NULL) + goto fail_nomem; + + SetPageReserved(virt_to_page(fe_ring.ring)); + + fe_ring.ring->req_prod = fe_ring.ring->resp_prod + = fe_ring.req_prod + = fe_ring.rsp_cons + = 0; + + /* Allocate the be ring. */ + be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL); + if (be_ring.ring == NULL) + goto fail_free_fe; + + SetPageReserved(virt_to_page(be_ring.ring)); + + be_ring.ring->req_prod = be_ring.ring->resp_prod + = be_ring.rsp_prod + = be_ring.req_cons + = 0; + + DPRINTK(KERN_ALERT "blktap open.\n"); + + return 0; + + fail_free_fe: + free_page( (unsigned long) fe_ring.ring); + + fail_nomem: + return -ENOMEM; +} + +static int blktap_release(struct inode *inode, struct file *filp) +{ + blktap_dev_inuse = 0; + blktap_ring_ok = 0; + + printk(KERN_ALERT "blktap closed.\n"); + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(fe_ring.ring)); + free_page((unsigned long) fe_ring.ring); + + ClearPageReserved(virt_to_page(be_ring.ring)); + free_page((unsigned long) be_ring.ring); + + return 0; +} + +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int size; + + printk(KERN_ALERT "blktap mmap (%lx, %lx)\n", + vma->vm_start, vma->vm_end); + + vma->vm_ops = &blktap_vm_ops; + + size = vma->vm_end - vma->vm_start; + if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { + printk(KERN_INFO + "blktap: you _must_ map exactly %d pages!\n", + MMAP_PAGES + RING_PAGES); + return -EAGAIN; + } + + size >>= PAGE_SHIFT; + printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); + + rings_vstart = vma->vm_start; + mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); + + /* Map the ring pages to the start of the region and reserve it. */ + + /* not sure if I really need to do this... */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring)); + if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE, + vma->vm_page_prot)) { + printk(KERN_ERR "be_ring: remap_page_range failure!\n"); + } + + DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring)); + if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring), + PAGE_SIZE, vma->vm_page_prot)) { + printk(KERN_ERR "fe_ring: remap_page_range failure!\n"); + } + + blktap_vma = vma; + blktap_ring_ok = 1; + + return 0; +} + +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch(cmd) { + case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ + return blktap_read_fe_ring(); + + case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */ + return blktap_read_be_ring(); + + case BLKTAP_IOCTL_SETMODE: + if (BLKTAP_MODE_VALID(arg)) { + blktap_mode = arg; + /* XXX: may need to flush rings here. */ + printk(KERN_INFO "blktap: set mode to %lx\n", arg); + return 0; + } + /* XXX: return a more meaningful error case here. */ + } + return -ENOIOCTLCMD; +} + +static unsigned int blktap_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &blktap_wait, wait); + + if ( (fe_ring.req_prod != fe_ring.ring->req_prod) || + (be_ring.rsp_prod != be_ring.ring->resp_prod) ) { + + fe_ring.ring->req_prod = fe_ring.req_prod; + be_ring.ring->resp_prod = be_ring.rsp_prod; + return POLLIN | POLLRDNORM; + } + + return 0; +} + +void blktap_kick_user(void) +{ + /* blktap_ring->req_prod = blktap_req_prod; */ + wake_up_interruptible(&blktap_wait); +} + +static struct file_operations blktap_fops = { + owner: THIS_MODULE, + poll: blktap_poll, + ioctl: blktap_ioctl, + open: blktap_open, + release: blktap_release, + mmap: blktap_mmap, +}; + +/* -------[ blktap module setup ]------------------------------------- */ + +static struct miscdevice blktap_miscdev = { + .minor = BLKTAP_MINOR, + .name = "blktap", + .fops = &blktap_fops, + .devfs_name = "misc/blktap", +}; + +int blktap_init(void) +{ + int err; + + err = misc_register(&blktap_miscdev); + if ( err != 0 ) + { + printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); + return err; + } + + init_waitqueue_head(&blktap_wait); + + + return 0; +} -- 2.30.2